{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import scipy.linalg as lin"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 加载数据函数\n",
    "def loadData(filename):\n",
    "    data = pd.read_csv(filename, sep='\\s+', header=None)\n",
    "    data = data.as_matrix()\n",
    "    col, row = data.shape\n",
    "    X = data[:, 0: row-1]\n",
    "    Y = data[:, row-1:row]\n",
    "    return X, Y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 决策树桩\n",
    "def decision_stump(X, Y, thres, U):\n",
    "    row, col = X.shape\n",
    "    r, c = thres.shape; besterr = 1\n",
    "    btheta = 0; bs = 0; index = 0\n",
    "    for i in range(col):\n",
    "        Yhat1 = np.sign(np.tile(X[:, i:i+1], (1, r)).T-thres[:, i:i+1]).T\n",
    "        err1 = (Yhat1!=Y).T.dot(U)\n",
    "        err2 = (-1*Yhat1!=Y).T.dot(U)\n",
    "        s = 1 if np.min(err1) < np.min(err2) else -1\n",
    "        if s == 1 and np.min(err1) < besterr:\n",
    "            besterr = np.min(err1); bs = 1\n",
    "            index = i; btheta = thres[np.argmin(err1), i]\n",
    "        if s == -1 and np.min(err2) < besterr:\n",
    "            besterr = np.min(err2); bs = -1\n",
    "            index = i; btheta = thres[np.argmin(err2), i]\n",
    "    return besterr, btheta, bs, index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# AdaBoost---Stump 算法\n",
    "# 需要说明: 与PPT上有点不同，始终保证sum(U)=1\n",
    "def ada_boost(X, Y, T):\n",
    "    row, col = X.shape\n",
    "    U = np.ones((row, 1))/row\n",
    "    Xsort = np.sort(X, 0)\n",
    "    thres = (np.r_[Xsort[0:1, :] - 0.1, Xsort[0:row - 1, :]] + Xsort) / 2\n",
    "    theta = np.zeros((T,)); s = np.zeros((T,));\n",
    "    index = np.zeros((T,)).astype(int); alpha = np.zeros((T,))\n",
    "    err = np.zeros((T,))\n",
    "    for i in range(T):\n",
    "        err[i], theta[i], s[i], index[i] = decision_stump(X, Y, thres, U)\n",
    "        yhat = s[i]*np.sign(X[:, index[i]:index[i]+1]-theta[i])\n",
    "        delta = np.sqrt((1-err[i])/err[i])\n",
    "        U[yhat==Y] /= delta\n",
    "        U[yhat!=Y] *= delta\n",
    "# Q14运行时，解除注释\n",
    "#        if i == T-1:\n",
    "#            print('sum(U): ', np.sum(U))\n",
    "        alpha[i] = np.log(delta)\n",
    "        U /= np.sum(U)\n",
    "# Q15运行时，解除注释\n",
    "#    print('最小的eta: ', np.min(err))\n",
    "    return theta, index, s, alpha"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 预测函数\n",
    "def predict(X, theta, index, s, alpha):\n",
    "    row, col = X.shape\n",
    "    num = len(theta)\n",
    "    ytemp = np.tile(s.reshape((1, num)), (row, 1))*np.sign(X[:, index]-theta.reshape((1, num)))\n",
    "    yhat = np.sign(ytemp.dot(alpha.reshape(num, 1)))\n",
    "    return yhat"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:4: FutureWarning: Method .as_matrix will be removed in a future version. Use .values instead.\n",
      "  after removing the cwd from sys.path.\n"
     ]
    }
   ],
   "source": [
    "# 导入数据\n",
    "X, Y = loadData('hw2_adaboost_train.dat')\n",
    "Xtest, Ytest = loadData('hw2_adaboost_test.dat')\n",
    "row, col = X.shape\n",
    "r, c = Xtest.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Ein(g1)： 0.24\n"
     ]
    }
   ],
   "source": [
    "# Q12\n",
    "theta, index, s, alpha = ada_boost(X, Y, 1)\n",
    "Ypred = predict(X, theta, index, s, alpha)\n",
    "print('Ein(g1)：', np.sum(Ypred!=Y)/row)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Ein(G)： 0.0\n"
     ]
    }
   ],
   "source": [
    "# Q13\n",
    "theta, index, s, alpha = ada_boost(X, Y, 300)\n",
    "Ypred = predict(X, theta, index, s, alpha)\n",
    "print('Ein(G)：', np.sum(Ypred!=Y)/r)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Q14 --- 打开上述注释项，在运行一次\n",
    "theta, index, s, alpha = ada_boost(X, Y, 1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Q16 \n",
    "theta, index, s, alpha = ada_boost(X, Y, 300)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Eout(g1)： 0.29\n"
     ]
    }
   ],
   "source": [
    "# Q17\n",
    "theta, index, s, alpha = ada_boost(X, Y, 1)\n",
    "Ypred = predict(Xtest, theta, index, s, alpha)\n",
    "print('Eout(g1)：', np.sum(Ypred!=Ytest)/r)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Eout(G)： 0.132\n"
     ]
    }
   ],
   "source": [
    "# Q18\n",
    "theta, index, s, alpha = ada_boost(X, Y, 300)\n",
    "Ypred = predict(Xtest, theta, index, s, alpha)\n",
    "print('Eout(G)：', np.sum(Ypred!=Ytest)/r)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "# ----------- Q19-20 --------------\n",
    "# 获得对偶矩阵K\n",
    "def matK(X, X1, gamma):\n",
    "    row, col =X.shape\n",
    "    r, c = X1.shape\n",
    "    K = np.zeros((row, r))\n",
    "    for i in range(r):\n",
    "        K[:, i] = np.sum((X-X1[i:i+1, :])**2, 1)\n",
    "    K = np.exp(-gamma*K)\n",
    "    return K"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:4: FutureWarning: Method .as_matrix will be removed in a future version. Use .values instead.\n",
      "  after removing the cwd from sys.path.\n"
     ]
    }
   ],
   "source": [
    "# 加载数据\n",
    "X, Y = loadData('hw2_lssvm_all.dat')\n",
    "Xtrain = X[0:400, :]; Ytrain = Y[0:400, :]\n",
    "Xtest = X[400:, :]; Ytest = Y[400:, :]\n",
    "row, col = Xtest.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "最小的Ein:  0.0\n",
      "最小的Eout:  0.39\n"
     ]
    }
   ],
   "source": [
    "# 测试\n",
    "gamma = [32, 2, 0.125]\n",
    "lamb = [0.001, 1, 1000]\n",
    "Ein = np.zeros((len(gamma), len(lamb)))\n",
    "Eout = np.zeros((len(gamma), len(lamb)))\n",
    "for i in range(len(gamma)):\n",
    "    K = matK(Xtrain, Xtrain, gamma[i])\n",
    "    K2 = matK(Xtrain, Xtest, gamma[i])\n",
    "    for j in range(len(lamb)):\n",
    "        beta = lin.pinv(lamb[j]*np.eye(400)+K).dot(Ytrain)\n",
    "        yhat = np.sign(K.dot(beta))\n",
    "        Ein[i, j] = np.sum(yhat != Ytrain)/400\n",
    "        yhat2 = np.sign(K2.T.dot(beta))\n",
    "        Eout[i, j] = np.sum(yhat2 != Ytest)/row\n",
    "print('最小的Ein: ', np.min(Ein))\n",
    "print('最小的Eout: ', np.min(Eout))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
